rm(list=ls())
library(MASS)
library(CBPS)

###-------------------------------------
# Required for Zhu et al method
###-------------------------------------
#library(gbm)  #Difficulties attaching; will use gbm::x instead below
library(survey)
library(mboost)
library(party)
library(energy)

F.aac.iter=function(i,ps.model,data,ps.num,rep,criterion) { 
  GBM.fitted=gbm::predict.gbm(ps.model,newdata=data,n.trees=floor(i),type="response") 
  ps.den=dnorm((data$T-GBM.fitted)/sd(data$T-GBM.fitted),0,1)
  wt=ps.num/ps.den
  aac_iter=rep(NA,rep)
  for (i in 1:rep){
    bo=sample(1:dim(data)[1],replace=TRUE,prob=wt)
    newsample=data[bo,]
    j.drop=match(c("T"),names(data))
    j.drop=j.drop[!is.na(j.drop)]
    x=newsample[,-j.drop]
    if(criterion=="spearman"|criterion=="kendall"){
      ac=apply(x, MARGIN=2, FUN=cor, y=newsample$T,method=criterion)
    } else if (criterion=="distance"){
      ac=apply(x, MARGIN=2, FUN=dcor, y=newsample$T) 
    } else if (criterion=="pearson"){
      ac=matrix(NA,dim(x)[2],1)
      for (j in 1:dim(x)[2]){
        ac[j]=ifelse (!is.factor(x[,j]), cor(newsample$T, x[,j], 
                                             method=criterion),polyserial(newsample$T, x[,j]))
      }
    } else print("The criterion is not correctly specified")
    aac_iter[i]=mean(abs(1/2*log((1+ac)/(1-ac))),na.rm=TRUE)
  }
  aac=mean(aac_iter)
  return(aac)
}

getzhu=function(Y,T,X){
  opt.reps=10  #was 50 in Zhu's code
  gammahat=rep(NA,6)   #6 methods in total are considered here
  SE=rep(NA,6)
  w.out=matrix(NA,nrow=length(Y),ncol=6)
  mydata=data.frame(T=T,X=X)

  # all models use the same numerator:
  model.num=lm(T~1,data=mydata)
  ps.num=dnorm((mydata$T-model.num$fitted)/(summary(model.num))$sigma,0,1)

  print("GBM: Pearson (2)")
  model.den=gbm::gbm(T~.,data=mydata, shrinkage = 0.0005, interaction.depth=4, distribution="gaussian",n.trees=20000)
  opt=optimize(F.aac.iter,interval=c(1,20000),
               ps.model=model.den,
               data=mydata,
               ps.num=ps.num,rep=opt.reps,criterion="pearson")
  best.aac.iter=opt$minimum
  best.aac=opt$objective
  model.den$fitted=predict(model.den,newdata=mydata,n.trees=floor(best.aac.iter),type="response")   
  ps.den=dnorm((mydata$T-model.den$fitted)/sd(mydata$T-model.den$fitted),0,1)
  weight.gbm=ps.num/ps.den
  dataset=data.frame(Y=Y,X=X,T=T,w=weight.gbm)
  design.ps=svydesign(ids=~1,weights=~w,data=dataset)
  gammahat[2]=svyglm(Y ~ T, data=dataset,design = design.ps)$coef[2]
  SE[2]=summary(svyglm(Y ~ T, data=dataset,design = design.ps))$coef[2,2]
  w.out[,2]=weight.gbm

  R=list()
  R$gammahat=gammahat
  R$SE=SE
  R$w.out=w.out
  return(R)
}

# Function for D, no misspec
getD_nomis= function(X){
  N=nrow(X)
  beta_D=matrix(.2,nrow=3,ncol=1)  #general coef of X influencing D.
  Dpure=X[,1]+X[,2] + X[,3:5]%*%beta_D 
  #D=Dpure+3*rnorm(N)  # original
  D=Dpure+2*rnorm(N)  # may 17
  bal_R2_true=cor(D,Dpure)^2
  R=list()
  R$D=D
  R$bal_R2_true=bal_R2_true
return(R)
}

# Function for D, misspec
getD_mis= function(X){
  N=nrow(X)
  beta_D=matrix(.2,nrow=3,ncol=1)  #general coef of X influencing D.
  Dpure=(X[,2]+.5)^2+2*X[,3:5]%*%beta_D 
  #D=Dpure+3*rnorm(N)
  D=Dpure+1.5*rnorm(N)  # 17 May test
  bal_R2_true=cor(D,Dpure)^2
  R=list()
  R$D=D
  R$bal_R2_true=bal_R2_true
  return(R)
}

getY_nomis=function(X){
  #we'll make covariates 4,5,6 influence the outcome
  #so 4,5 are important for both but 6 influence
  # the outcome and don treatment with 3 influences
  # the treatment and not outcome.
  N=nrow(X)
  beta_Y=matrix(.1,nrow=3,ncol=1) #general coef of X influencing Y.
  Ypure=X[,2]+X[,4:6]%*%beta_Y+ATE*D  #November version
  Y=Ypure+5*rnorm(N)
  cor(Y,Ypure)^2
  R=list()
  R$Y=Y
  R$out_R2_true=cor(Y,Ypure)^2
return(R)
}

getY_mis=function(X){
  N=nrow(X)
  beta_Y=matrix(.1,nrow=3,ncol=1) #general coef of X influencing Y.
  Ypure=2*(X[,2]+.5)^2+ATE*D+5*X[,4:6]%*%beta_Y
  Y=Ypure+5*rnorm(N)
  R=list()
  R$Y=Y
  R$out_R2_true=cor(Y,Ypure)^2
  return(R)
}

###--------------------------------
### Setup 
###-------------------------------
iters=500
N=200
varstosave=c("bal_R2_true","out_R2_true",
             "bal_F_orig","bal_F_mle","bal_F_cbps_exact","bal_F_np","bal_F_zhu2",
             "bal_p_orig","bal_p_mle","bal_p_cbps_exact","bal_p_np","bal_p_zhu2",
              "ATE_orig","ATE_mle","ATE_cbps_exact","ATE_np","ATE_zhu2",
             "ATE_p_orig","ATE_p_mle","ATE_p_cbps_exact","ATE_p_np","ATE_p_zhu2")
gencov=0.2  #general covariance (and cor) among different covariates.

###-------------------------------------------
#No mis-specification
###--------------------------------------------

resultsmat=matrix(NA, nrow=iters, ncol=length(varstosave))
colnames(resultsmat)=varstosave
pb = txtProgressBar(min = 0, max = iters, style = 3)
set.seed=12345
for(j in 1:iters){
  print(paste0("Working on iter=",j))
    P=10
    ATE=1
    Sigma_sim=matrix(gencov, nrow=P, ncol=P)
    diag(Sigma_sim)=1
    X=mvrnorm(n=N, mu=rep(0,P), Sigma=Sigma_sim)
    getD.out=getD_nomis(X)
    D=getD.out$D
    resultsmat[j,"bal_R2_true"]=getD.out$bal_R2_true
    
    getY.out=getY_nomis(X)
    Y=getY.out$Y
    resultsmat[j,"out_R2_true"]=getY.out$out_R2_true
    
    pscorefit.mle<-lm(D ~ ., data=as.data.frame(X))
    mle.sigmasq<-mean((D - fitted(pscorefit.mle))^2)
    mle.stabilizers<-sapply(D, function(t) mean(dnorm(t, mean = fitted(pscorefit.mle), sd = sqrt(mle.sigmasq))))
    pscorefit.mle.weights<-mle.stabilizers/dnorm(D, mean = fitted(pscorefit.mle), sd = sqrt(mle.sigmasq))
    pscorefit.exact<-CBPS(D ~ ., data=as.data.frame(X), twostep = TRUE, method = "exact")
    pscorefit.np<-pscorefit.exact
    pscorefit.np$weights<- npCBPS(D~X, corprior=.1/N, print.level=1)$w

    #Get the results from Zhu (the 6 methods they offer)
    zhu.out = getzhu(T = D, Y = Y, X = X)

    #Balance check:
    bal.orig=summary(lm(D~X))
    bal.mle=summary(lm(D~X,weights=pscorefit.mle.weights))
    bal.cbps.exact=summary(lm(D~X, weights=pscorefit.exact$weights))
    bal.np=summary(lm(D~X, weights=pscorefit.np$weights))
    bal.zhu2=summary(lm(D~X, weights=zhu.out$w.out[,2]))
    
    resultsmat[j,"bal_F_orig"]=bal.orig$fstatistic[1]
    resultsmat[j,"bal_F_mle"]=bal.mle$fstatistic[1]
    resultsmat[j,"bal_F_cbps_exact"]=bal.cbps.exact$fstatistic[1]
    resultsmat[j,"bal_F_np"]=bal.np$fstatistic[1]
    resultsmat[j,"bal_F_zhu2"]=bal.zhu2$fstatistic[1]
    
    #get worst p-value
    resultsmat[j,"bal_p_orig"]=min(bal.orig$coef[-1,4])
    resultsmat[j,"bal_p_mle"]=min(bal.mle$coef[-1,4])
    resultsmat[j,"bal_p_cbps_exact"]=min(bal.cbps.exact$coef[-1,4])
    resultsmat[j,"bal_p_np"]=min(bal.np$coef[-1,4])
    resultsmat[j,"bal_p_zhu2"]=min(bal.zhu2$coef[-1,4])
    
    ### Effect estimates:
    lm.orig=summary(lm(Y~D))
    lm.mle=summary(lm(Y~D, weights=pscorefit.mle.weights))
    lm.cbps.exact=summary(lm(Y~D, weights=pscorefit.exact$weights))
    lm.np=summary(lm(Y~D, weights=pscorefit.np$weights))
    lm.zhu2=summary(lm(Y~D, weights=zhu.out$w.out[,2]))
    
    #try to get analytical SE
    pscorefit.exact<-CBPS(D ~ ., data=as.data.frame(X), twostep = TRUE, method = "exact")
    Z=cbind(rep(1,N), D)
    y.mod=lm(Y~D, weights=pscorefit.exact$weights)
    delta=y.mod$coef
    asym.vcov=vcov_outcome(pscorefit.exact, Y=Y, Z=Z, delta=delta)
    se=sqrt(asym.vcov[2,2])
    
    resultsmat[j,"ATE_orig"]=lm.orig$coef[2]
    resultsmat[j,"ATE_mle"]=lm.mle$coef[2]
    resultsmat[j,"ATE_cbps_exact"]=lm.cbps.exact$coef[2]
    resultsmat[j,"ATE_np"]=lm.np$coef[2]
    resultsmat[j,"ATE_zhu2"]=lm.zhu2$coef[2]

    resultsmat[j,"ATE_p_orig"]=lm.orig$coef[2,4]
    resultsmat[j,"ATE_p_mle"]=lm.mle$coef[2,4]
    resultsmat[j,"ATE_p_cbps_exact"]=lm.cbps.exact$coef[2,4]
    resultsmat[j,"ATE_p_np"]=lm.np$coef[2,4]
    resultsmat[j,"ATE_p_zhu2"]=lm.zhu2$coef[2,4]
    setTxtProgressBar(pb, j)
} #end iters loop

close(pb)

#Get the MSE from the ATE estimates:
ATEresults=resultsmat[,c("ATE_orig","ATE_mle","ATE_cbps_exact",
                         "ATE_np","ATE_zhu2")]
rmse.nomis=apply(X = ATEresults, MARGIN=2, function(x) mean((x-ATE)^2)^.5)
bias.nomis=apply(X = ATEresults, MARGIN=2, function(x) mean(x-ATE))

###----------------------------------------------------------
# Mis-specification of pscore, but outcome still linear in X.
###-----------------------------------------------------------
resultsmat2=matrix(NA, nrow=iters, ncol=length(varstosave))
colnames(resultsmat2)=varstosave

pb = txtProgressBar(min = 0, max = iters, style = 3)
for(j in 1:iters){
  P=10
  ATE=1
  Sigma_sim=matrix(gencov, nrow=P, ncol=P)
  diag(Sigma_sim)=1
  X=mvrnorm(n=N, mu=rep(0,P), Sigma=Sigma_sim)
  
  getD.out=getD_mis(X)
  D=getD.out$D
  resultsmat2[j,"bal_R2_true"]=getD.out$bal_R2_true
  
  getY.out=getY_nomis(X)
  Y=getY.out$Y
  resultsmat2[j,"out_R2_true"]=getY.out$out_R2_true
  
  pscorefit.mle<-lm(D ~ ., data=as.data.frame(X))
  mle.sigmasq<-mean((D - fitted(pscorefit.mle))^2)
  mle.stabilizers<-sapply(D, function(t) mean(dnorm(t, mean = fitted(pscorefit.mle), sd = sqrt(mle.sigmasq))))
  pscorefit.mle.weights<-mle.stabilizers/dnorm(D, mean = fitted(pscorefit.mle), sd = sqrt(mle.sigmasq))

  pscorefit.exact<-CBPS(D ~ ., data=as.data.frame(X), twostep = TRUE, method = "exact")
  pscorefit.np<-pscorefit.exact
  pscorefit.np$weights<- npCBPS(D~X, corprior=.1/N, print.level=1)$w
  zhu.out = getzhu(T = D, Y = Y, X = X)
  
  #Balance check:
  bal.orig=summary(lm(D~X))
  bal.mle=summary(lm(D~X,weights=pscorefit.mle.weights))
  bal.cbps.exact=summary(lm(D~X, weights=pscorefit.exact$weights))
  bal.np=summary(lm(D~X, weights=pscorefit.np$weights))
  bal.zhu2=summary(lm(D~X, weights=zhu.out$w.out[,2]))
  
  resultsmat2[j,"bal_F_orig"]=bal.orig$fstatistic[1]
  resultsmat2[j,"bal_F_mle"]=bal.mle$fstatistic[1]
  resultsmat2[j,"bal_F_cbps_exact"]=bal.cbps.exact$fstatistic[1]
  resultsmat2[j,"bal_F_np"]=bal.np$fstatistic[1]
  resultsmat2[j,"bal_F_zhu2"]=bal.zhu2$fstatistic[1]  
  
  #get worst p-value
  resultsmat2[j,"bal_p_orig"]=min(bal.orig$coef[-1,4])
  resultsmat2[j,"bal_p_mle"]=min(bal.mle$coef[-1,4])
  resultsmat2[j,"bal_p_cbps_exact"]=min(bal.cbps.exact$coef[-1,4])
  resultsmat2[j,"bal_p_np"]=min(bal.np$coef[-1,4])
  resultsmat2[j,"bal_p_zhu2"]=min(bal.zhu2$coef[-1,4])  

  ### Effect estimates:
  lm.orig=summary(lm(Y~D))
  lm.mle=summary(lm(Y~D, weights=pscorefit.mle.weights))
  lm.cbps.exact=summary(lm(Y~D, weights=pscorefit.exact$weights))
  lm.np=summary(lm(Y~D, weights=pscorefit.np$weights))
  lm.zhu2=summary(lm(Y~D, weights=zhu.out$w.out[,2]))
  
  resultsmat2[j,"ATE_orig"]=lm.orig$coef[2]
  resultsmat2[j,"ATE_mle"]=lm.mle$coef[2]
  resultsmat2[j,"ATE_cbps_exact"]=lm.cbps.exact$coef[2]
  resultsmat2[j,"ATE_np"]=lm.np$coef[2]
  resultsmat2[j,"ATE_zhu2"]=lm.zhu2$coef[2]
  
  resultsmat2[j,"ATE_p_orig"]=lm.orig$coef[2,4]
  resultsmat2[j,"ATE_p_mle"]=lm.mle$coef[2,4]
  resultsmat2[j,"ATE_p_cbps_exact"]=lm.cbps.exact$coef[2,4]
  resultsmat2[j,"ATE_p_np"]=lm.np$coef[2,4]
  resultsmat2[j,"ATE_p_zhu2"]=lm.zhu2$coef[2,4]
  
  setTxtProgressBar(pb, j)
} #end iters loop

close(pb)

#Get the MSE from the ATE estimates:
ATEresults=resultsmat2[,c("ATE_orig","ATE_mle","ATE_cbps_exact",
                         "ATE_np","ATE_zhu2")]
rmse.misD=apply(X = ATEresults, MARGIN=2, function(x) mean((x-ATE)^2)^.5)
bias.misD=apply(X = ATEresults, MARGIN=2, function(x) mean(x-ATE))


###----------------------------------------------------------
# Mis-specification of outcome, but pscore right.
###-----------------------------------------------------------
resultsmat3=matrix(NA, nrow=iters, ncol=length(varstosave))
colnames(resultsmat3)=varstosave

pb = txtProgressBar(min = 0, max = iters, style = 3)
for(j in 1:iters){
  P=10
  ATE=1
  Sigma_sim=matrix(gencov, nrow=P, ncol=P)
  diag(Sigma_sim)=1
  X=mvrnorm(n=N, mu=rep(0,P), Sigma=Sigma_sim)
  
  getD.out=getD_nomis(X)
  D=getD.out$D
  resultsmat3[j,"bal_R2_true"]=getD.out$bal_R2_true
  
  getY.out=getY_mis(X)
  Y=getY.out$Y
  resultsmat3[j,"out_R2_true"]=getY.out$out_R2_true
  
  pscorefit.mle<-lm(D ~ ., data=as.data.frame(X))
  mle.sigmasq<-mean((D - fitted(pscorefit.mle))^2)
  mle.stabilizers<-sapply(D, function(t) mean(dnorm(t, mean = fitted(pscorefit.mle), sd = sqrt(mle.sigmasq))))
  pscorefit.mle.weights<-mle.stabilizers/dnorm(D, mean = fitted(pscorefit.mle), sd = sqrt(mle.sigmasq))

  pscorefit.exact<-CBPS(D ~ ., data=as.data.frame(X), twostep = TRUE, method = "exact")
  pscorefit.np<-pscorefit.exact
  
  pscorefit.np$weights<- npCBPS(D~X, corprior=.1/N, print.level=1)$w
  zhu.out = getzhu(T = D, Y = Y, X = X)
  
  #Balance check:
  bal.orig=summary(lm(D~X))
  bal.mle=summary(lm(D~X,weights=pscorefit.mle.weights))
  bal.cbps.exact=summary(lm(D~X, weights=pscorefit.exact$weights))
  bal.np=summary(lm(D~X, weights=pscorefit.np$weights))
  bal.zhu2=summary(lm(D~X, weights=zhu.out$w.out[,2]))
  
  resultsmat3[j,"bal_F_orig"]=bal.orig$fstatistic[1]
  resultsmat3[j,"bal_F_mle"]=bal.mle$fstatistic[1]
  resultsmat3[j,"bal_F_cbps_exact"]=bal.cbps.exact$fstatistic[1]
  resultsmat3[j,"bal_F_np"]=bal.np$fstatistic[1]
  resultsmat3[j,"bal_F_zhu2"]=bal.zhu2$fstatistic[1]  
  
  #get worst p-value
  resultsmat3[j,"bal_p_orig"]=min(bal.orig$coef[-1,4])
  resultsmat3[j,"bal_p_mle"]=min(bal.mle$coef[-1,4])
  resultsmat3[j,"bal_p_cbps_exact"]=min(bal.cbps.exact$coef[-1,4])
  resultsmat3[j,"bal_p_np"]=min(bal.np$coef[-1,4])
  resultsmat3[j,"bal_p_zhu2"]=min(bal.zhu2$coef[-1,4])  
  
  ### Effect estimates:
  lm.orig=summary(lm(Y~D))
  lm.mle=summary(lm(Y~D, weights=pscorefit.mle.weights))
  lm.cbps.exact=summary(lm(Y~D, weights=pscorefit.exact$weights))
  lm.np=summary(lm(Y~D, weights=pscorefit.np$weights))
  lm.zhu2=summary(lm(Y~D, weights=zhu.out$w.out[,2]))
  
  resultsmat3[j,"ATE_orig"]=lm.orig$coef[2]
  resultsmat3[j,"ATE_mle"]=lm.mle$coef[2]
  resultsmat3[j,"ATE_cbps_exact"]=lm.cbps.exact$coef[2]
  resultsmat3[j,"ATE_np"]=lm.np$coef[2]
  resultsmat3[j,"ATE_zhu2"]=lm.zhu2$coef[2]
  
  resultsmat3[j,"ATE_p_orig"]=lm.orig$coef[2,4]
  resultsmat3[j,"ATE_p_mle"]=lm.mle$coef[2,4]
  resultsmat3[j,"ATE_p_cbps_exact"]=lm.cbps.exact$coef[2,4]
  resultsmat3[j,"ATE_p_np"]=lm.np$coef[2,4]
  resultsmat3[j,"ATE_p_zhu2"]=lm.zhu2$coef[2,4]
  
  setTxtProgressBar(pb, j)
} #end iters loop

close(pb)

#Get the MSE from the ATE estimates:
ATEresults=resultsmat3[,c("ATE_orig","ATE_mle","ATE_cbps_exact",
                          "ATE_np","ATE_zhu2")]
rmse.misY=apply(X = ATEresults, MARGIN=2, function(x) mean((x-ATE)^2)^.5)
bias.misY=apply(X = ATEresults, MARGIN=2, function(x) mean(x-ATE))

###----------------------------------------------------------
# Mis-specification of both treatment and outcome.
###-----------------------------------------------------------
resultsmat4=matrix(NA, nrow=iters, ncol=length(varstosave))
colnames(resultsmat4)=varstosave

pb = txtProgressBar(min = 0, max = iters, style = 3)
for(j in 1:iters){
  P=10
  ATE=1
  Sigma_sim=matrix(gencov, nrow=P, ncol=P)
  diag(Sigma_sim)=1
  X=mvrnorm(n=N, mu=rep(0,P), Sigma=Sigma_sim)
  
  getD.out=getD_mis(X)
  D=getD.out$D
  resultsmat4[j,"bal_R2_true"]=getD.out$bal_R2_true
  
  getY.out=getY_mis(X)
  Y=getY.out$Y
  resultsmat4[j,"out_R2_true"]=getY.out$out_R2_true
  
  pscorefit.mle<-lm(D ~ ., data=as.data.frame(X))
  mle.sigmasq<-mean((D - fitted(pscorefit.mle))^2)
  mle.stabilizers<-sapply(D, function(t) mean(dnorm(t, mean = fitted(pscorefit.mle), sd = sqrt(mle.sigmasq))))
  pscorefit.mle.weights<-mle.stabilizers/dnorm(D, mean = fitted(pscorefit.mle), sd = sqrt(mle.sigmasq))

  pscorefit.exact<-CBPS(D ~ ., data=as.data.frame(X), twostep = TRUE, method = "exact")
  pscorefit.np<-pscorefit.exact
  pscorefit.np$weights<- npCBPS(D~X, corprior=.1/N, print.level=1)$w
  zhu.out = getzhu(T = D, Y = Y, X = X)
  
  #Balance check:
  bal.orig=summary(lm(D~X))
  bal.mle=summary(lm(D~X,weights=pscorefit.mle.weights))
  bal.cbps.exact=summary(lm(D~X, weights=pscorefit.exact$weights))
  bal.np=summary(lm(D~X, weights=pscorefit.np$weights))
  bal.zhu2=summary(lm(D~X, weights=zhu.out$w.out[,2]))
  
  resultsmat4[j,"bal_F_orig"]=bal.orig$fstatistic[1]
  resultsmat4[j,"bal_F_mle"]=bal.mle$fstatistic[1]
  resultsmat4[j,"bal_F_cbps_exact"]=bal.cbps.exact$fstatistic[1]
  resultsmat4[j,"bal_F_np"]=bal.np$fstatistic[1]
  resultsmat4[j,"bal_F_zhu2"]=bal.zhu2$fstatistic[1]  
  
  #get worst p-value
  resultsmat4[j,"bal_p_orig"]=min(bal.orig$coef[-1,4])
  resultsmat4[j,"bal_p_mle"]=min(bal.mle$coef[-1,4])
  resultsmat4[j,"bal_p_cbps_exact"]=min(bal.cbps.exact$coef[-1,4])
  resultsmat4[j,"bal_p_np"]=min(bal.np$coef[-1,4])
  resultsmat4[j,"bal_p_zhu2"]=min(bal.zhu2$coef[-1,4])  
  
  ### Effect estimates:
  lm.orig=summary(lm(Y~D))
  lm.mle=summary(lm(Y~D, weights=pscorefit.mle.weights))
  lm.cbps.exact=summary(lm(Y~D, weights=pscorefit.exact$weights))
  lm.np=summary(lm(Y~D, weights=pscorefit.np$weights))
  lm.zhu2=summary(lm(Y~D, weights=zhu.out$w.out[,2]))
  
  resultsmat4[j,"ATE_orig"]=lm.orig$coef[2]
  resultsmat4[j,"ATE_mle"]=lm.mle$coef[2]
  resultsmat4[j,"ATE_cbps_exact"]=lm.cbps.exact$coef[2]
  resultsmat4[j,"ATE_np"]=lm.np$coef[2]
  resultsmat4[j,"ATE_zhu2"]=lm.zhu2$coef[2]
  
  resultsmat4[j,"ATE_p_orig"]=lm.orig$coef[2,4]
  resultsmat4[j,"ATE_p_mle"]=lm.mle$coef[2,4]
  resultsmat4[j,"ATE_p_cbps_exact"]=lm.cbps.exact$coef[2,4]
  resultsmat4[j,"ATE_p_np"]=lm.np$coef[2,4]
  resultsmat4[j,"ATE_p_zhu2"]=lm.zhu2$coef[2,4]
  
  setTxtProgressBar(pb, j)
} #end iters loop

close(pb)

#Get the MSE from the ATE estimates:
ATEresults=resultsmat4[,c("ATE_orig","ATE_mle","ATE_cbps_exact",
                          "ATE_np","ATE_zhu2")]
rmse.misboth=apply(X = ATEresults, MARGIN=2, function(x) mean((x-ATE)^2)^.5)
bias.misboth=apply(X = ATEresults, MARGIN=2, function(x) mean(x-ATE))

#save.image(file="SimulationWorkspace.RData")

###----------------------------------------------------
# Graphics in paper.
###---------------------------------------------------

#load("SimulationWorkspace.RData")

nameset=c("Unweighted","MLE","GBM","CBGPS","npCBGPS")

## F-stat plots re: balance:
resultsmat.nolog=resultsmat[,c("bal_F_orig","bal_F_mle","bal_F_zhu2","bal_F_cbps_exact","bal_F_np")]
resultsmat.log=log10(resultsmat[,c("bal_F_orig","bal_F_mle","bal_F_zhu2","bal_F_cbps_exact","bal_F_np")]+1)

resultsmat2.log=log10(resultsmat2[,c("bal_F_orig","bal_F_mle","bal_F_zhu2","bal_F_cbps_exact","bal_F_np")]+1)
resultsmat2.nolog=resultsmat2[,c("bal_F_orig","bal_F_mle","bal_F_zhu2","bal_F_cbps_exact","bal_F_np")]

#pdf(width=8, height=6, "Bal_F_sim.pdf")
par(mfrow=c(1,2))
par(mar=c(7,4,4,2))
ymax=2.85
goody=c(0,1,10,50,100,500)
boxplot.matrix(ylab="F-stat from Model for T|X",frame.plot=FALSE,cex.axis=1, las=1,
               ylim=c(0,ymax), cex.lab=1, cex.axis=1.2, resultsmat.log, names=nameset, yaxt="n",
               xaxt="n")
axis(2, at=log10(goody+1), label=goody) 
axis(1, at=seq(1,5), labels=nameset,las=2)
title(cex.main=1,"Correct E[T|X]")
abline(h=0, col=2, lty=2)

boxplot.matrix(ylab="F-stat from Model for T|X",ylim=c(0,ymax), frame.plot=FALSE,
               resultsmat2.log,names=nameset, yaxt="n", cex.axis=1.2, xaxt="n")
axis(2, at=log10(goody+1), labels=goody) 
axis(1, at=seq(1,5), labels=nameset, las=2)

title(cex.main=1,"Incorrect E[T|X]")
abline(h=0, col=2, lty=2)
dev.off()

###--------------------------------------
# ATE estimates
###-------------------------------------
#pdf(width=9, height=9, "ATE_sim.pdf")
par(mfrow=c(2,2))
ylims=c(-1.5,4.56)
bias_height=4
rmse_height=3.5
par(mar=c(7,4,4,2))

boxplot.matrix(ylab="ATE Estimate",frame.plot=FALSE,ylim=ylims, cex.axis=1,
    resultsmat[,c("ATE_orig","ATE_mle","ATE_zhu2","ATE_cbps_exact","ATE_np")], 
    names=nameset,las=2)
title(cex.main=1.2,"Correct E[T|X], linear Y|X")
abline(h=ATE, col=2, lty=2)
text(x=.6,y=bias_height, "bias:", cex=.9)
text(x=.1+seq(1,5), y=bias_height, labels = round(bias.nomis[c(1,2,5,3,4)],3), cex=.9)
text(x=.6,y=rmse_height, "rmse:", cex=.9)
text(x=.1+seq(1,5), y=rmse_height, labels = round(rmse.nomis[c(1,2,5,3,4)],3), cex=.9)


boxplot.matrix(ylab="ATE Estimate", ylim=ylims, frame.plot=FALSE,resultsmat2[,
     c("ATE_orig","ATE_mle","ATE_zhu2","ATE_cbps_exact","ATE_np")],
     names=nameset, las=2)

title(cex.main=1.2,"Incorrect E[T|X], linear Y|X")
abline(h=ATE, col=2, lty=2)
text(x=.6,y=bias_height, "bias:", cex=.9)
text(x=.1+seq(1,5), y=bias_height, labels = round(bias.misD[c(1,2,5,3,4)],3), cex=.9)
text(x=.6,y=rmse_height, "rmse:", cex=.9)
text(x=.1+seq(1,5), y=rmse_height, labels = round(rmse.misD[c(1,2,5,3,4)],3), cex=.9)

boxplot.matrix(ylab="ATE Estimate", ylim=ylims, cex.axis=1, frame.plot=FALSE, resultsmat3[,
    c("ATE_orig","ATE_mle","ATE_zhu2","ATE_cbps_exact","ATE_np")],
    names=nameset, las=2)
title(cex.main=1.2,"Correct E[T|X], non-linear Y|X")
abline(h=ATE, col=2, lty=2)
text(x=.6,y=bias_height, "bias:", cex=.9)
text(x=.1+seq(1,5), y=bias_height, labels = round(bias.misY[c(1,2,5,3,4)],3), cex=.9)
text(x=.6,y=rmse_height, "rmse:", cex=.9)
text(x=.1+seq(1,5), y=rmse_height, labels = round(rmse.misY[c(1,2,5,3,4)],3), cex=.9)

boxplot.matrix(ylab="ATE Estimate", ylim=ylims, cex.axis=1, frame.plot=FALSE,resultsmat4[,
  c("ATE_orig","ATE_mle","ATE_zhu2","ATE_cbps_exact","ATE_np")],
   names=nameset, las=2)
title(cex.main=1.2,"Incorrect E[T|X], non-linear Y|X")
abline(h=ATE, col=2, lty=2)
text(x=.6,y=-.4, "bias:", cex=.9)
text(x=.1+seq(1,5), y=-.4, labels = round(bias.misboth[c(1,2,5,3,4)],3), cex=.9)
text(x=.6,y=-.9, "rmse:", cex=.9)
text(x=.1+seq(1,5), y=-.9, labels = round(rmse.misboth[c(1,2,5,3,4)],3), cex=.9)
dev.off()